pacman::p_load(tidyverse, data.table, broom, latex2exp,modelsummary,kableExtra,ggpubr,estimatr)
options("modelsummary_format_numeric_latex" = "plain")
rm(list=ls())
################################################################################


####################  Agronomic productivity pasture index
df = fread("../../data/agronomy/clean/faogaez_argentinabrazil.csv.gz")
df_fao = df[crop %in% c('pasture','grass','alfalfa','grass','maize','soybean'),]
df_p = df_fao[crop %in% c('pasture'),]


####################  Calf weight
df = fread("../../data/prices/clean/cepea.csv.gz")[, year:= as.integer(format(as.Date(date, format="%m/%d/%Y"),"%Y"))]
df = df %>% drop_na(average_weight_kg)
df_w = df[year<=2019, .(average_weight_calf_kg=mean(average_weight_kg), average_weight_calf_t=mean(average_weight_kg)/1000 ), by = .(year)] 


####################  Stocking rates 

for(country in c('argentina','brazil')){
  df_l = fread(paste0('../../data/landuse/clean/landuse_',country,'_county.csv.gz')) 
  df_c = fread(paste0('../../data/production/clean/cattlestock_',country,'_county_census.csv.gz'))
  if(country =='argentina'){
    df_c[, cattle:=total_cattlestock]
    df_ar = merge(df_l[pasture>0, list(year, county_id, pasture)], df_c, by=c('year','county_id'))
    df_ar[,country:='Argentina']
  }else{
    df_l[, pasture:=pasture_natural+pasture_planted]
    df_c[, cattle:=total_cattlestock]
    df_br_all = merge(df_l[pasture>0, list(year, county_id, pasture)], df_c, by=c('year','county_id'))
    df_br_all[,country:='Brazil']
  }
}

##### By establishment size
size_bin_list = c("0_1_ha","1_2_ha","2_5_ha","5_10_ha","10_20_ha","20_50_ha","50_100_ha","100_200_ha","200_500_ha","500_ha")
size_bin_list = c("2_5_ha","5_10_ha","10_20_ha","20_50_ha","50_100_ha","100_200_ha","200_500_ha","500_ha")

df_sr = fread(paste0('../../data/size/clean/brazil_stocking_rates.csv.gz'))
df_sr = df_sr[size_bin %in% size_bin_list,]

df = df_sr[variable %in% c("0_1_y","1_2_y","2_y") & group_type=='activity' & group_item=='total' & end_use=='total',]
df = df[, list(county_id, size_bin, value)]
df = df[, lapply(.SD, sum, na.rm=TRUE), by = .(county_id, size_bin)]
df[, cattle:=value]
df_c = df

pasture_list = c("pasture_natural","pasture_planted_degraded","pasture_planted")
df = df_sr[variable=='area_ha' & group_type=='land_use' & group_item %in% pasture_list  & end_use=='total',]
df = df[, list(county_id, size_bin, value)]
df = df[, lapply(.SD, sum, na.rm=TRUE), by = .(county_id, size_bin)]
df[, pasture:=value]
df_l = df

df = merge(df_l[pasture>0, list(county_id, size_bin, pasture)], df_c[cattle>0, list(county_id, size_bin, cattle)], by=c('county_id','size_bin'))
df[,country:='Brazil']
df[,year:=2006]

df_br_size = df[size_bin %in% c('20_50_ha'), ][, list(year, country, county_id, pasture, cattle)]
df_br_size_sum = df[, list(year, country, county_id, pasture, cattle )][, lapply(.SD, sum, na.rm=TRUE), by = .(year, country, county_id)]

df_br = df_br_all

##### Stocking rate data
df_s = rbind(df_ar[cattle>0, list(year, country, county_id, cattle, pasture)], 
             df_br[cattle>0, list(year, country, county_id, cattle, pasture)])
df_s[, cattle_per_ha := cattle/pasture]
df_u = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))
for(spatial_id_b in c('amc_id','microregion_id','mesoregion_id') ){
  df = merge(df_s[country=='Brazil'],df_u[,c('county_id','amc_id','microregion_id','mesoregion_id')], by=c('county_id'))
  setnames(df, old=spatial_id_b, new='spatial_id')
  df_s_b = df[,list(year, country, spatial_id, cattle_per_ha)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, country, spatial_id)]
  df_s_a = df_s[country=='Argentina'][,spatial_id:=county_id][,list(year, country, spatial_id, cattle_per_ha)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, country, spatial_id)]
  df_srates = rbind(df_s_a,df_s_b)
  write.csv(df_srates, gzfile(paste0("../../data/agronomy/clean/stockingrates_argentinabrazil_",spatial_id_b,".csv.gz")), row.names = FALSE)
}


################### Projection

year_list = c(2002, 2008, 2018, 1995, 2006, 2017)
df_s=df_s[year %in% year_list,]
df = merge(df_s,df_w ,by=c('year'))
df_y = df[, .(cattle_per_ha=mean(cattle_per_ha), cattle_h=mean(cattle), pasture_ha=mean(pasture), weight_t=mean(average_weight_calf_t) ), by = .(county_id)]
df_wide = merge(df_fao,df_y,by=c('county_id'))

crop_list = c('pasture','grass','alfalfa','maize','soybean')
crop_list = append(crop_list,append(crop_list,crop_list))
xlab_list = c('pasture index','grass index','alfalfa yield (t/ha)','maize yield (t/ha)','soybean yield (t/ha)')
xlab_list = append(xlab_list,append(xlab_list,xlab_list))
ylab_list = c('cattle/ha','cattle/ha','cattle/ha','cattle/ha','cattle/ha')
ylab_list = append(ylab_list,append(ylab_list,ylab_list))


################### Regression
df = df_fao[, yield:=int_mean, ][crop %in% c('pasture','grass','alfalfa','maize','soybean') & yield>0, ][, list(county_id, state_id, region, country, crop, yield)]
df_x = dcast(df, county_id + state_id + region + country ~ crop,  value.var = "yield")
df = merge(df_x,df_y,by=c('county_id'))
df[, c('x_var','y_var','w_var') := .( pasture, cattle_per_ha, pasture_ha ) ]

#Remove outliers
x_lq=quantile(df$x_var,0.25)
x_uq=quantile(df$x_var,0.75)
x_iqr = IQR(df$x_var)
y_lq=quantile(df$y_var,0.25)
y_uq=quantile(df$y_var,0.75)
y_iqr = IQR(df$y_var)
df = df[x_var > x_lq-x_iqr*1.5 & x_var < x_uq+x_iqr*1.5,] 
df = df[y_var > y_lq-y_iqr*1.5 & y_var < y_uq+y_iqr*1.5,]
df = df[maize>0 & soybean>0,]

#Specifications
m1 = lm_robust(y_var ~ x_var, data=df, weights=w_var)
m2 = lm_robust(y_var ~ x_var + maize, data=df, weights=w_var)
m3 = lm_robust(y_var ~ x_var + soybean, data=df, weights=w_var)
m4 = lm_robust(y_var ~ x_var + maize + soybean, data=df, weights=w_var)
msummary(list(m1,m2,m3,m4), estimate = "{estimate}{stars}", gof_omit = 'R2 Pseudo|AIC|BIC|Log.Lik.')

#Export table
results_tab <- msummary(list(m1,m2,m3,m4),
                        coef_map = c('(Intercept)'='intercept','x_var'='pasture yield (index)', 'maize'= 'maize yield (t/ha)', 'soybean'= 'soybean yield  (t/ha)'),
                        estimate = "{estimate}{stars}",
                        gof_omit = 'RMSE|R2 Pseudo|R2 Adj.|AIC|BIC|Log.Lik.|F', escape=F,
                        output='latex',
                        label="x",
                        title = "\\label{tab: pasture productivity fit}Correlation between stocking rates and pasture productivity index.") %>%
  add_header_above(c(" " = 1, "Dependent variable: Cattle head/ha" = 4)) 
kableExtra::save_kable(results_tab, file = "../../output/tables/appendix/table1_pastureproductivity.tex")

#Save final measure
final_model = m1
c1 = summary(final_model)$coefficients[1]
c2 = summary(final_model)$coefficients[2]
w1 = mean(df_w$average_weight_calf_t)

df_f1 = df_p
df_f1[, int_mean:= (c1+c2*int_mean)*w1]
df_f1$unit = 'tonne per hectare'

df_f2 = df_p
df_f2[, int_mean:= (c1+c2*int_mean)]
df_f2$unit = 'head per hectare'

df_f = rbind(df_f1,df_f2)
write.csv(df_f, gzfile(paste0("../../data/agronomy/clean/pastureproductivity_argentinabrazil.csv.gz")), row.names = FALSE)


